import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3
def __iter__(self): return 0
# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
cos_client = ibm_boto3.client(service_name='s3',
ibm_api_key_id='696W9JyPmRlNKHtY3s-8sBBPSmFLennhf_3obuhn63C0',
ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
config=Config(signature_version='oauth'),
endpoint_url='https://s3.private.eu-de.cloud-object-storage.appdomain.cloud')
bucket = 'ctr-donotdelete-pr-kp5ryzb5c6gmwg'
object_key = 'Click.csv'
body = cos_client.get_object(Bucket=bucket,Key=object_key)['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )
df_1 = pd.read_csv(body)
df_1.head(10)
| Daily Time Spent on Site | Age | Area Income | Daily Internet Usage | Ad Topic Line | City | Gender | Country | Timestamp | Clicked on Ad | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 62.26 | 32.0 | 69481.85 | 172.83 | Decentralized real-time circuit | Lisafort | Male | Svalbard & Jan Mayen Islands | 2016-06-09 21:43:05 | 0 |
| 1 | 41.73 | 31.0 | 61840.26 | 207.17 | Optional full-range projection | West Angelabury | Male | Singapore | 2016-01-16 17:56:05 | 0 |
| 2 | 44.40 | 30.0 | 57877.15 | 172.83 | Total 5thgeneration standardization | Reyesfurt | Female | Guadeloupe | 2016-06-29 10:50:45 | 0 |
| 3 | 59.88 | 28.0 | 56180.93 | 207.17 | Balanced empowering success | New Michael | Female | Zambia | 2016-06-21 14:32:32 | 0 |
| 4 | 49.21 | 30.0 | 54324.73 | 201.58 | Total 5thgeneration standardization | West Richard | Female | Qatar | 2016-07-21 10:54:35 | 1 |
| 5 | 51.30 | 26.0 | 51463.17 | 131.68 | Focused multi-state workforce | Port Maria | Female | Cameroon | 2016-05-15 13:18:34 | 0 |
| 6 | 66.08 | 43.0 | 73538.09 | 136.40 | Optimized upward-trending productivity | Port Jeffrey | Male | Turkey | 2016-04-03 21:13:46 | 1 |
| 7 | 36.08 | 26.0 | 74903.41 | 228.78 | Programmable uniform website | East Kevinbury | Male | French Guiana | 2016-03-11 06:49:10 | 0 |
| 8 | 46.14 | 33.0 | 43974.49 | 196.77 | Customizable tangible hierarchy | Lake Annashire | Male | Vanuatu | 2016-03-24 06:36:52 | 0 |
| 9 | 51.65 | 51.0 | 74535.94 | 188.56 | Function-based incremental standardization | Edwardmouth | Female | Cameroon | 2016-01-31 05:12:44 | 0 |
df_1.tail()
| Daily Time Spent on Site | Age | Area Income | Daily Internet Usage | Ad Topic Line | City | Gender | Country | Timestamp | Clicked on Ad | |
|---|---|---|---|---|---|---|---|---|---|---|
| 9995 | 41.73 | 31.0 | 61840.26 | 207.17 | Profound executive flexibility | West Angelabury | Male | Singapore | 2016-01-03 03:22:15 | 1 |
| 9996 | 41.73 | 28.0 | 51501.38 | 120.49 | Managed zero tolerance concept | Kennedyfurt | Male | Luxembourg | 2016-05-28 12:20:15 | 0 |
| 9997 | 55.60 | 39.0 | 38067.08 | 124.44 | Intuitive exuding service-desk | North Randy | Female | Egypt | 2016-01-05 11:53:17 | 0 |
| 9998 | 46.61 | 50.0 | 43974.49 | 123.13 | Realigned content-based leverage | North Samantha | Female | Malawi | 2016-04-04 07:07:46 | 1 |
| 9999 | 46.61 | 43.0 | 60575.99 | 198.45 | Optimized upward-trending productivity | Port Jeffrey | Male | Northern Mariana Islands | 2016-04-03 21:13:46 | 1 |
df_1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10000 entries, 0 to 9999 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Daily Time Spent on Site 10000 non-null float64 1 Age 10000 non-null float64 2 Area Income 10000 non-null float64 3 Daily Internet Usage 10000 non-null float64 4 Ad Topic Line 10000 non-null object 5 City 10000 non-null object 6 Gender 10000 non-null object 7 Country 10000 non-null object 8 Timestamp 10000 non-null object 9 Clicked on Ad 10000 non-null int64 dtypes: float64(4), int64(1), object(5) memory usage: 781.4+ KB
df_1.describe()
| Daily Time Spent on Site | Age | Area Income | Daily Internet Usage | Clicked on Ad | |
|---|---|---|---|---|---|
| count | 10000.000000 | 10000.000000 | 10000.000000 | 10000.000000 | 10000.000000 |
| mean | 61.660757 | 35.940100 | 53840.047721 | 177.759831 | 0.491700 |
| std | 15.704142 | 8.572973 | 13343.708718 | 40.820951 | 0.499956 |
| min | 32.600000 | 19.000000 | 13996.500000 | 105.220000 | 0.000000 |
| 25% | 48.860000 | 29.000000 | 44052.302500 | 140.150000 | 0.000000 |
| 50% | 59.590000 | 35.000000 | 56180.930000 | 178.920000 | 0.000000 |
| 75% | 76.580000 | 42.000000 | 61840.260000 | 212.670000 | 1.000000 |
| max | 90.970000 | 60.000000 | 79332.330000 | 269.960000 | 1.000000 |
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default
'plotly'
df_1['Clicked on Ad']=df_1["Clicked on Ad"].map({0:"No",1:"Yes"})
fig=px.box(df_1,
x="Daily Time Spent on Site",
color="Clicked on Ad",
title="Click through rate based time spent on site",
color_discrete_map={'Yes':'Pink',
'No':'Blue'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
fig=px.box(df_1,
x="Daily Internet Usage",
color="Clicked on Ad",
title="Click through rate based on Daily Internet Usage",
color_discrete_map={'Yes':'Pink',
'No':'Blue'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
fig=px.box(df_1,
x="Age",
color="Clicked on Ad",
title="Click through rate based on Age",
color_discrete_map={'Yes':'Pink',
'No':'Blue'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
fig=px.box(df_1,
x="Area Income",
color="Clicked on Ad",
title="Click Through Rate based on Income",
color_discrete_map={'Yes':'Pink',
'No':'Blue'})
fig.update_traces(quartilemethod="exclusive")
fig.show()
df_1['Clicked on Ad'].value_counts()
No 5083 Yes 4917 Name: Clicked on Ad, dtype: int64
click_through_rate=4917/10000*100
print(click_through_rate)
49.17